/*
 * $Id: Perl5Matcher.java,v 1.27 2003/11/07 20:16:25 dfs Exp $
 *
 * ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2000 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation", "Jakarta-Oro" 
 *    must not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache" 
 *    or "Jakarta-Oro", nor may "Apache" or "Jakarta-Oro" appear in their 
 *    name, without prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */


package org.apache.oro.text.regex;

import java.util.*;

/**
 * The Perl5Matcher class is used to match regular expressions
 * (conforming to the Perl5 regular expression syntax) generated by
 * Perl5Compiler.
 * <p>
 * Perl5Compiler and Perl5Matcher are designed with the intent that
 * you use a separate instance of each per thread to avoid the overhead
 * of both synchronization and concurrent access (e.g., a match that takes
 * a long time in one thread will block the progress of another thread with
 * a shorter match).  If you want to use a single instance of each
 * in a concurrent program, you must appropriately protect access to
 * the instances with critical sections.  If you want to share Perl5Pattern
 * instances between concurrently executing instances of Perl5Matcher, you
 * must compile the patterns with {@link Perl5Compiler#READ_ONLY_MASK}.
 *
 * @version @version@
 * @since 1.0
 * @see PatternMatcher
 * @see Perl5Compiler
 */
public final class Perl5Matcher implements PatternMatcher {
  private static final char __EOS = Character.MAX_VALUE;
  private static final int __INITIAL_NUM_OFFSETS = 20;

  private boolean __multiline = false, __lastSuccess = false;
  private boolean __caseInsensitive = false;
  private char __previousChar, __input[], __originalInput[];
  private Perl5Repetition __currentRep;
  private int __numParentheses, __bol, __eol, __currentOffset, __endOffset;

  private char[] __program;
  private int __expSize, __inputOffset, __lastParen;
  private int[] __beginMatchOffsets, __endMatchOffsets;
  private Stack __stack = new Stack();
  private Perl5MatchResult __lastMatchResult = null;

  private static boolean
    __compare(char[] s1, int s1Offs, char[] s2, int s2Offs, int n)
  {
    int cnt;

    for(cnt = 0; cnt < n; cnt++, s1Offs++, s2Offs++) {
      if(s1Offs >= s1.length)
	return false;
      if(s2Offs >= s2.length)
	return false;
      if(s1[s1Offs] != s2[s2Offs])
	return false;
    }

    return true;
  }

  private static int __findFirst(char[] input, int current, int endOffset,
				 char[] mustString)
  {
    int count, saveCurrent;
    char ch;


    if(input.length == 0)
      return endOffset;

    ch = mustString[0];
    // Find the offset of the first character of the must string
    while(current < endOffset) {
      if(ch == input[current]){
	saveCurrent = current;
	count = 0;

	while(current < endOffset && count < mustString.length) {
	  if(mustString[count] != input[current])
	    break;
	  ++count;
	  ++current;
	}

	current = saveCurrent;

	if(count >= mustString.length)
	  break;
      }
      ++current;
    }

    return current;
  }


  private void __pushState(int parenFloor) {
    int[] state;
    int stateEntries, paren;

    stateEntries = 3*(__expSize - parenFloor);
    if(stateEntries <= 0)
      state = new int[3];
    else
      state = new int[stateEntries + 3];

    state[0] = __expSize;
    state[1] = __lastParen;
    state[2] = __inputOffset;

    for(paren = __expSize; paren > parenFloor; --paren, stateEntries-=3) {
      state[stateEntries]     = __endMatchOffsets[paren];
      state[stateEntries + 1] = __beginMatchOffsets[paren];
      state[stateEntries + 2] = paren;
    }

    __stack.push(state);
  }


  private void __popState() {
    int[] state;
    int entry, paren;

    state = (int[])__stack.pop();

    __expSize     = state[0];
    __lastParen   = state[1];
    __inputOffset = state[2];

    for(entry = 3; entry < state.length; entry+=3) {
      paren = state[entry + 2];
      __beginMatchOffsets[paren] = state[entry + 1];

      if(paren <= __lastParen)
	__endMatchOffsets[paren] = state[entry];
    }

    for(paren = __lastParen + 1; paren <= __numParentheses; paren++) {
      if(paren > __expSize)
	__beginMatchOffsets[paren] = OpCode._NULL_OFFSET;
      __endMatchOffsets[paren] = OpCode._NULL_OFFSET;
    }
  }


  // Initialize globals needed before calling __tryExpression for first time
  private void __initInterpreterGlobals(Perl5Pattern expression, char[] input,
					int beginOffset, int endOffset,
					int currentOffset)
  {
    // Remove this hack after more efficient case-folding and unicode
    // character classes are implemented
    __caseInsensitive            = expression._isCaseInsensitive;
    __input                      = input;
    __endOffset                  = endOffset;
    __currentRep                 = new Perl5Repetition();
    __currentRep._numInstances   = 0;
    __currentRep._lastRepetition = null;
    __program                    = expression._program;
    __stack.setSize(0);

    // currentOffset should always be >= beginOffset and should
    // always be equal to zero when beginOffset equals 0, but we
    // make a weak attempt to protect against a violation of this
    // precondition
    if(currentOffset == beginOffset || currentOffset <= 0)
      __previousChar = '\n';
    else {
      __previousChar = input[currentOffset - 1];
      if(!__multiline && __previousChar == '\n')
	__previousChar = '\0';
    }

    __numParentheses    = expression._numParentheses;
    __currentOffset     = currentOffset;

    __bol = beginOffset;
    __eol = endOffset;

    // Ok, here we're using endOffset as a temporary variable.
    endOffset = __numParentheses + 1;
    if(__beginMatchOffsets == null || endOffset > __beginMatchOffsets.length) {
      if(endOffset < __INITIAL_NUM_OFFSETS)
	endOffset = __INITIAL_NUM_OFFSETS;
      __beginMatchOffsets = new int[endOffset];
      __endMatchOffsets   = new int[endOffset];      
    }
  }

  // Set the match result information.  Only call this if we successfully
  // matched.
  private void __setLastMatchResult() {
    int offs, maxEndOffs = 0;

    // endOffset+=dontTry;

    __lastMatchResult = new Perl5MatchResult(__numParentheses + 1);

    // This can happen when using Perl5StreamInput
    if(__endMatchOffsets[0] > __originalInput.length)
      throw new ArrayIndexOutOfBoundsException();

    __lastMatchResult._matchBeginOffset = __beginMatchOffsets[0];

    while(__numParentheses >= 0) {
      offs = __beginMatchOffsets[__numParentheses];

      if(offs >= 0)
	__lastMatchResult._beginGroupOffset[__numParentheses] =
	  offs - __lastMatchResult._matchBeginOffset;
      else
	__lastMatchResult._beginGroupOffset[__numParentheses] =
	  OpCode._NULL_OFFSET;

      offs = __endMatchOffsets[__numParentheses];

      if(offs >= 0) {
	__lastMatchResult._endGroupOffset[__numParentheses] =
	  offs - __lastMatchResult._matchBeginOffset;
	if(offs > maxEndOffs && offs <= __originalInput.length)
	  maxEndOffs = offs;
      } else
	__lastMatchResult._endGroupOffset[__numParentheses] =
	  OpCode._NULL_OFFSET;

      --__numParentheses;
    }

    __lastMatchResult._match =
      new String(__originalInput, __beginMatchOffsets[0],
		 maxEndOffs - __beginMatchOffsets[0]);

    // Free up for garbage collection
    __originalInput = null;
  }



  // Expects to receive a valid regular expression program.  No checking
  // is done to ensure validity.
  // __originalInput must be set before calling this method for
  // __lastMatchResult to be set correctly.
  // beginOffset marks the beginning of the string
  // currentOffset marks where to start the pattern search
  private boolean __interpret(Perl5Pattern expression, char[] input,
			      int beginOffset, int endOffset,
			      int currentOffset)
  {
    boolean success;
    int minLength = 0, dontTry = 0, offset;
    char ch, mustString[];

    __initInterpreterGlobals(expression, input, beginOffset, endOffset,
			     currentOffset);

    success = false;
    mustString = expression._mustString;

  _mainLoop:
    while(true) {

      if(mustString != null &&
	 ((expression._anchor & Perl5Pattern._OPT_ANCH) == 0 ||
	  ((__multiline || 
	   (expression._anchor & Perl5Pattern._OPT_ANCH_MBOL) != 0)
	   && expression._back >= 0)))
	{

	__currentOffset =
	  __findFirst(__input, __currentOffset, endOffset, mustString);
	
	if(__currentOffset >= endOffset) {
	  if((expression._options & Perl5Compiler.READ_ONLY_MASK) == 0)
	    expression._mustUtility++;
	  success = false;
	  break _mainLoop;
	} else if(expression._back >= 0) {
	  __currentOffset-=expression._back;
	  if(__currentOffset < currentOffset)
	    __currentOffset = currentOffset;
	  minLength = expression._back + mustString.length;
	} else if(!expression._isExpensive &&
		  (expression._options & Perl5Compiler.READ_ONLY_MASK) == 0 &&
		  (--expression._mustUtility < 0)) {
	  // Be careful!  The preceding logical expression is constructed
	  // so that mustUtility is only decremented if the expression is
	  // compiled without READ_ONLY_MASK.
	  mustString = expression._mustString = null;
	  __currentOffset = currentOffset;
	} else {
	  __currentOffset = currentOffset;
	  minLength = mustString.length;
	}
      }

      if((expression._anchor & Perl5Pattern._OPT_ANCH) != 0) {
	if(__currentOffset == beginOffset && __tryExpression(beginOffset)) {
	  success = true;
	  break _mainLoop;
	} else if(__multiline
		  || (expression._anchor & Perl5Pattern._OPT_ANCH_MBOL) != 0
		  || (expression._anchor & Perl5Pattern._OPT_IMPLICIT) != 0)
	  {
	    if(minLength > 0)
	      dontTry = minLength - 1;
	    endOffset-=dontTry;

	    if(__currentOffset > currentOffset)
	      --__currentOffset;

	    while(__currentOffset < endOffset) {
	      if(__input[__currentOffset++] == '\n') {
		if(__currentOffset < endOffset &&
		   __tryExpression(__currentOffset)) {
		  success = true;
		  break _mainLoop;
		}
	      }
	    }
	  }

	break _mainLoop;
      }


      if(expression._startString != null) {
	mustString = expression._startString;
	if((expression._anchor & Perl5Pattern._OPT_SKIP) != 0) {
	  ch = mustString[0];

	  while(__currentOffset < endOffset) {
	    if(ch == __input[__currentOffset]) {
	      if(__tryExpression(__currentOffset)){
		success = true;
		break _mainLoop;
	      }
	      ++__currentOffset;
	      while(__currentOffset < endOffset &&
		    __input[__currentOffset] == ch)
		    ++__currentOffset;
	    }
	    ++__currentOffset;
	  }
	} else {

	  while((__currentOffset =
		 __findFirst(__input, __currentOffset, endOffset, mustString))
		< endOffset){
	    if(__tryExpression(__currentOffset)) {
	      success = true;
	      break _mainLoop;
	    }
	    ++__currentOffset;
	  }
	}

	break _mainLoop;
      }

      if((offset = expression._startClassOffset) != OpCode._NULL_OFFSET) {
	boolean doEvery, tmp;
	char op;

	doEvery = ((expression._anchor & Perl5Pattern._OPT_SKIP) == 0);

	if(minLength > 0)
	  dontTry = minLength - 1;
	endOffset -= dontTry;
	tmp = true;

	switch(op = __program[offset]) {
	case OpCode._ANYOF:
	  offset = OpCode._getOperand(offset);
	  while(__currentOffset < endOffset) {
	    ch = __input[__currentOffset];

	    if(ch < 256 &&
	       (__program[offset + (ch >> 4)] & (1 << (ch & 0xf))) == 0) {
	      if(tmp && __tryExpression(__currentOffset)) {
		success = true;
		break _mainLoop;
	      } else
		tmp = doEvery;
	    } else
	      tmp = true;
	    ++__currentOffset;
	  }

	  break;

	case OpCode._ANYOFUN:
	case OpCode._NANYOFUN:
	  offset = OpCode._getOperand(offset);
	  while(__currentOffset < endOffset) {
	    ch = __input[__currentOffset];

	    if(__matchUnicodeClass(ch, __program, offset, op)) {
	      if(tmp && __tryExpression(__currentOffset)) {
		success = true;
		break _mainLoop;
	      } else
		tmp = doEvery;
	    } else
	      tmp = true;
	    ++__currentOffset;
	  }

	  break;

	case OpCode._BOUND:
	  if(minLength > 0) {
	    ++dontTry;
	    --endOffset;
	  }

	  if(__currentOffset != beginOffset) {
	    ch = __input[__currentOffset - 1];
	    tmp = OpCode._isWordCharacter(ch);
	  } else
	    tmp = OpCode._isWordCharacter(__previousChar);

	  while(__currentOffset < endOffset) {
	    ch = __input[__currentOffset];
	    if(tmp != OpCode._isWordCharacter(ch)){
	      tmp = !tmp;
	      if(__tryExpression(__currentOffset)) {
		success = true;
		break _mainLoop;
	      }
	    }
	    ++__currentOffset;
	  }

	  if((minLength > 0 || tmp) && 
	     __tryExpression(__currentOffset)) {
	    success = true;
	    break _mainLoop;
	  }
	  break;

	case OpCode._NBOUND:
	  if(minLength > 0) {
	    ++dontTry;
	    --endOffset;
	  }

	  if(__currentOffset != beginOffset) {
	    ch = __input[__currentOffset - 1];
	    tmp = OpCode._isWordCharacter(ch);
	  } else
	    tmp = OpCode._isWordCharacter(__previousChar);

	  while(__currentOffset < endOffset) {
	    ch = __input[__currentOffset];
	    if(tmp != OpCode._isWordCharacter(ch))
	      tmp = !tmp;
	    else if(__tryExpression(__currentOffset)) {
	      success = true;
	      break _mainLoop;
	    }

	    ++__currentOffset;
	  }

	  if((minLength > 0 || !tmp) &&
	     __tryExpression(__currentOffset)) {
	    success = true;
	    break _mainLoop;
	  }
	  break;

	case OpCode._ALNUM:
	  while(__currentOffset < endOffset) {
	    ch = __input[__currentOffset];
	    if(OpCode._isWordCharacter(ch)) {
	      if(tmp && __tryExpression(__currentOffset)) {
		success = true;
		break _mainLoop;
	      } else
		tmp = doEvery;
	    } else
	      tmp = true;
	    ++__currentOffset;
	  }
	  break;

	case OpCode._NALNUM:
	  while(__currentOffset < endOffset) {
	    ch = __input[__currentOffset];
	    if(!OpCode._isWordCharacter(ch)) {
	      if(tmp && __tryExpression(__currentOffset)) {
		success = true;
		break _mainLoop;
	      } else
		tmp = doEvery;
	    } else
	      tmp = true;
	    ++__currentOffset;
	  }
	  break;

	case OpCode._SPACE:
	  while(__currentOffset < endOffset) {
	    if(Character.isWhitespace(__input[__currentOffset])) {
	      if(tmp && __tryExpression(__currentOffset)) {
		success = true;
		break _mainLoop;
	      } else
		tmp = doEvery;
	    } else
	      tmp = true;
	    ++__currentOffset;
	  }
	  break;

	case OpCode._NSPACE:
	  while(__currentOffset < endOffset) {
	    if(!Character.isWhitespace(__input[__currentOffset])) {
	      if(tmp && __tryExpression(__currentOffset)) {
		success = true;
		break _mainLoop;
	      } else
		tmp = doEvery;
	    } else
	      tmp = true;
	    ++__currentOffset;
	  }
	  break;

	case OpCode._DIGIT:
	  while(__currentOffset < endOffset) {
	    if(Character.isDigit(__input[__currentOffset])) {
	      if(tmp && __tryExpression(__currentOffset)) {
		success = true;
		break _mainLoop;
	      } else
		tmp = doEvery;
	    } else
	      tmp = true;
	    ++__currentOffset;
	  }
	  break;


	case OpCode._NDIGIT:
	  while(__currentOffset < endOffset) {
	    if(!Character.isDigit(__input[__currentOffset])) {
	      if(tmp && __tryExpression(__currentOffset)) {
		success = true;
		break _mainLoop;
	      } else
		tmp = doEvery;
	    } else
	      tmp = true;
	    ++__currentOffset;
	  }
	  break;
	} // end switch

      } else {
	if(minLength > 0)
	  dontTry = minLength - 1;
	endOffset-=dontTry;

	do {
	  if(__tryExpression(__currentOffset)) {
	    success = true;
	    break _mainLoop;
	  }
	} while(__currentOffset++ < endOffset);

      }


      break _mainLoop;
    } // end while

    __lastSuccess = success;
    __lastMatchResult = null;

    return success;
  }
  
  private boolean __matchUnicodeClass(char code, char __program[], 
			     int offset ,char opcode)
  {
    boolean isANYOF = ( opcode == OpCode._ANYOFUN );

    while( __program[offset] != OpCode._END ){
      if( __program[offset] == OpCode._RANGE ){
	offset++;
	if((code >= __program[offset]) && (code <= __program[offset+1])){
	  return isANYOF;
	} else {
	  offset+=2;
	}

      } else if(__program[offset] == OpCode._ONECHAR) {
       	offset++;
	if(__program[offset++] == code) return isANYOF;

      } else {
	isANYOF = (__program[offset] == OpCode._OPCODE) 
	  ? isANYOF : !isANYOF;

	offset++;
	switch ( __program[offset++] ) {
	case OpCode._ALNUM:
	  if(OpCode._isWordCharacter(code)) return isANYOF;
	  break;
	case OpCode._NALNUM:
	  if(!OpCode._isWordCharacter(code)) return isANYOF;
	  break;
	case OpCode._SPACE:
	  if(Character.isWhitespace(code)) return isANYOF;
	  break;
	case OpCode._NSPACE:
	  if(!Character.isWhitespace(code)) return isANYOF;
	  break;
	case OpCode._DIGIT:
	  if(Character.isDigit(code)) return isANYOF;
	  break;
	case OpCode._NDIGIT:
	  if(!Character.isDigit(code)) return isANYOF;
	  break;
	case OpCode._ALNUMC:
	  if(Character.isLetterOrDigit(code)) return isANYOF;
	  break;
	case OpCode._ALPHA:
	  if(Character.isLetter(code)) return isANYOF;
	  break;
	case OpCode._BLANK:
	  if(Character.isSpaceChar(code)) return isANYOF;
	  break;
	case OpCode._CNTRL:
	  if(Character.isISOControl(code)) return isANYOF;
	  break;
	case OpCode._LOWER:
	  if(Character.isLowerCase(code)) return isANYOF;
	  // Remove this hack after more efficient case-folding and unicode
	  // character classes are implemented
	  if(__caseInsensitive && Character.isUpperCase(code))
	    return isANYOF;
	  break;
	case OpCode._UPPER:
	  if(Character.isUpperCase(code)) return isANYOF;
	  // Remove this hack after more efficient case-folding and unicode
	  // character classes are implemented
	  if(__caseInsensitive && Character.isLowerCase(code))
	    return isANYOF;
	  break;
	case OpCode._PRINT:
	  if(Character.isSpaceChar(code)) return isANYOF;
          // Fall through to check if the character is alphanumeric,
	  // or a punctuation mark.  Printable characters are either
	  // alphanumeric, punctuation marks, or spaces.
	case OpCode._GRAPH:
	  if(Character.isLetterOrDigit(code)) return isANYOF;
          // Fall through to check if the character is a punctuation mark.
          // Graph characters are either alphanumeric or punctuation.
	case OpCode._PUNCT:
	  switch ( Character.getType(code) ) {
	    case Character.DASH_PUNCTUATION:
	    case Character.START_PUNCTUATION:
	    case Character.END_PUNCTUATION:
	    case Character.CONNECTOR_PUNCTUATION:
	    case Character.OTHER_PUNCTUATION:
            case Character.MATH_SYMBOL:
            case Character.CURRENCY_SYMBOL:
            case Character.MODIFIER_SYMBOL:
	      return isANYOF;
	    default:
	      break;
	    }
	  break;
	case OpCode._XDIGIT:
	  if( (code >= '0' && code <= '9') ||
	      (code >= 'a' && code <= 'f') ||
	      (code >= 'A' && code <= 'F')) return isANYOF;
	  break;
	case OpCode._ASCII:
	  if(code < 0x80)return isANYOF;
	}
      } 
    }
    return !isANYOF;
  }
  
  private boolean __tryExpression(int offset) {
    int count;
    
    __inputOffset = offset;
    __lastParen   = 0;
    __expSize     = 0;

    if(__numParentheses > 0) {
      for(count=0; count <= __numParentheses; count++) {
	__beginMatchOffsets[count] = OpCode._NULL_OFFSET;
	__endMatchOffsets[count]   = OpCode._NULL_OFFSET;
      }
    }

    if(__match(1)){
      __beginMatchOffsets[0] = offset;
      __endMatchOffsets[0]   = __inputOffset;
      return true;
    }

    return false;
  }
    

  private int __repeat(int offset, int max) {
    int scan, eol, operand, ret;
    char ch;
    char op;

    scan = __inputOffset;
    eol  = __eol;

    if(max != Character.MAX_VALUE && max < eol - scan)
      eol = scan + max;

    operand = OpCode._getOperand(offset);

    switch(op = __program[offset]) {

    case OpCode._ANY:
      while(scan < eol && __input[scan] != '\n')
	++scan;
      break;

    case OpCode._SANY:
      scan = eol;
      break;

    case OpCode._EXACTLY:
      ++operand;
      while(scan < eol && __program[operand] == __input[scan])
	++scan;
      break;

    case OpCode._ANYOF:
      if(scan < eol && (ch = __input[scan]) < 256) {
	while((ch < 256  ) && (__program[operand + (ch >> 4)] & (1 << (ch & 0xf))) == 0) {
	  if(++scan < eol)
	    ch = __input[scan];
	  else
	    break;
	}
      }
      break;

    case OpCode._ANYOFUN:
    case OpCode._NANYOFUN:
      if(scan < eol) {
	ch = __input[scan];
	while(__matchUnicodeClass(ch, __program, operand, op)){
	  if(++scan < eol)
	    ch = __input[scan];
	  else
	    break;
	}
      }
      break;

    case OpCode._ALNUM:
      while(scan < eol && OpCode._isWordCharacter(__input[scan]))
	++scan;
      break;

    case OpCode._NALNUM:
      while(scan < eol && !OpCode._isWordCharacter(__input[scan]))
	++scan;
      break;

    case OpCode._SPACE:
      while(scan < eol && Character.isWhitespace(__input[scan]))
	++scan;
      break;

    case OpCode._NSPACE:
      while(scan < eol && !Character.isWhitespace(__input[scan]))
	++scan;
      break;

    case OpCode._DIGIT:
      while(scan < eol && Character.isDigit(__input[scan]))
	++scan;
      break;

    case OpCode._NDIGIT:
      while(scan < eol && !Character.isDigit(__input[scan]))
	++scan;
      break;

    default:
      break;

    }

    ret = scan - __inputOffset;
    __inputOffset = scan;

    return ret;
  }


  private boolean __match(int offset) {
    char nextChar, op;
    int scan, next, input, maxScan, current, line, arg;
    boolean inputRemains = true, minMod = false;
    Perl5Repetition rep;


    input    = __inputOffset;
    inputRemains = (input < __endOffset);
    nextChar = (inputRemains ? __input[input] : __EOS);

    scan     = offset;
    maxScan  = __program.length;

    while(scan < maxScan /*&& scan > 0*/){
      next = OpCode._getNext(__program, scan);

      switch(op = __program[scan]) {

      case OpCode._BOL:
	if(input == __bol ? __previousChar == '\n' :
	   (__multiline && (inputRemains || input < __eol) && 
	    __input[input - 1] == '\n'))
	  break;
	return false;

      case OpCode._MBOL:
	if(input == __bol ? __previousChar == '\n' :
	   ((inputRemains || input < __eol) && __input[input - 1] == '\n'))
	  break;
	return false;

      case OpCode._SBOL:
	if(input == __bol && __previousChar == '\n')
	  break;
	return false;

      case OpCode._GBOL:
	if(input == __bol)
	  break;
	return true;

      case OpCode._EOL :
	if((inputRemains || input < __eol) && nextChar != '\n')
	  return false;
	if(!__multiline && __eol - input > 1)
	  return false;
	break;

      case OpCode._MEOL:
	if((inputRemains || input < __eol) && nextChar != '\n')
	  return false;
	break;

      case OpCode._SEOL:
	if((inputRemains || input < __eol) && nextChar != '\n')
	  return false;
	if(__eol - input > 1)
	  return false;
	break;

      case OpCode._SANY:
	if(!inputRemains && input >= __eol)
	  return false;
	inputRemains = (++input < __endOffset);
	nextChar = (inputRemains ? __input[input] : __EOS);
	break;

      case OpCode._ANY:
	if((!inputRemains && input >= __eol) || nextChar == '\n')
	  return false;
	inputRemains = (++input < __endOffset);
	nextChar = (inputRemains ? __input[input] : __EOS);
	break;

      case OpCode._EXACTLY:
	current = OpCode._getOperand(scan);
	line = __program[current++];

	if(__program[current] != nextChar)
	  return false;
	if(__eol - input < line)
	  return false;

	if(line > 1 && !__compare(__program, current, __input, input, line))
	  return false;

	input+=line;
	inputRemains = (input < __endOffset);
	nextChar = (inputRemains ? __input[input] : __EOS);
	break;

      case OpCode._ANYOF:
	current = OpCode._getOperand(scan);

	if(nextChar == __EOS && inputRemains)
	  nextChar = __input[input];

	if(nextChar >= 256 || (__program[current + (nextChar >> 4)] &
	    (1 << (nextChar & 0xf))) != 0)
	  return false;

	if(!inputRemains && input >= __eol)
	  return false;

	inputRemains = (++input < __endOffset);
	nextChar = (inputRemains ? __input[input] : __EOS);
	break;

      case OpCode._ANYOFUN:
      case OpCode._NANYOFUN:
	current = OpCode._getOperand(scan);

	if(nextChar == __EOS && inputRemains)
	  nextChar = __input[input];

	if(!__matchUnicodeClass(nextChar, __program, current, op))
	  return false;

	if(!inputRemains && input >= __eol)
	  return false;

	inputRemains = (++input < __endOffset);
	nextChar = (inputRemains ? __input[input] : __EOS);
	break;

      case OpCode._ALNUM:
	if(!inputRemains)
	  return false;
	if(!OpCode._isWordCharacter(nextChar))
	  return false;
	inputRemains = (++input < __endOffset);
	nextChar = (inputRemains ? __input[input] : __EOS);
	break;

      case OpCode._NALNUM:
	if(!inputRemains && input >= __eol)
	  return false;
	if(OpCode._isWordCharacter(nextChar))
	  return false;
	inputRemains = (++input < __endOffset);
	nextChar = (inputRemains ? __input[input] : __EOS);
	break;


      case OpCode._NBOUND:
      case OpCode._BOUND:
	boolean a, b;

	if(input == __bol)
	  a = OpCode._isWordCharacter(__previousChar);
	else
	  a = OpCode._isWordCharacter(__input[input - 1]);

	b = OpCode._isWordCharacter(nextChar);

	if((a == b) == (__program[scan] == OpCode._BOUND))
	  return false;
	break;

      case OpCode._SPACE:
	if(!inputRemains && input >= __eol)
	  return false;
	if(!Character.isWhitespace(nextChar))
	  return false;
	inputRemains = (++input < __endOffset);
	nextChar = (inputRemains ? __input[input] : __EOS);
	break;


      case OpCode._NSPACE:
	if(!inputRemains)
	  return false;
	if(Character.isWhitespace(nextChar))
	  return false;
	inputRemains = (++input < __endOffset);
	nextChar = (inputRemains ? __input[input] : __EOS);
	break;

      case OpCode._DIGIT:
	if(!Character.isDigit(nextChar))
	  return false;
	inputRemains = (++input < __endOffset);
	nextChar = (inputRemains ? __input[input] : __EOS);
	break;

      case OpCode._NDIGIT:
	if(!inputRemains && input >= __eol)
	  return false;
	if(Character.isDigit(nextChar))
	  return false;
	inputRemains = (++input < __endOffset);
	nextChar = (inputRemains ? __input[input] : __EOS);
	break;

      case OpCode._REF:
	arg = OpCode._getArg1(__program, scan);
	current = __beginMatchOffsets[arg];

	if(current == OpCode._NULL_OFFSET)
	  return false;

	if(__endMatchOffsets[arg] == OpCode._NULL_OFFSET)
	  return false;

	if(current == __endMatchOffsets[arg])
	  break;

	if(__input[current] != nextChar)
	  return false;

	line = __endMatchOffsets[arg] - current;

	if(input + line > __eol)
	  return false;

	if(line > 1 && !__compare(__input, current, __input, input, line))
	  return false;

	input+=line;
	inputRemains = (input < __endOffset);
	nextChar     = (inputRemains ? __input[input] : __EOS);
	break;

      case OpCode._NOTHING:
	break;

      case OpCode._BACK:
	break;

      case OpCode._OPEN:
	arg = OpCode._getArg1(__program, scan);
	__beginMatchOffsets[arg] = input;

	if(arg > __expSize)
	  __expSize = arg;
	break;

      case OpCode._CLOSE:
	arg = OpCode._getArg1(__program, scan);
	__endMatchOffsets[arg] = input;

	if(arg > __lastParen)
	  __lastParen = arg;
	break;

      case OpCode._CURLYX:
	rep = new Perl5Repetition();
	rep._lastRepetition = __currentRep;
	__currentRep = rep;

	rep._parenFloor = __lastParen;
	rep._numInstances = -1;
	rep._min    = OpCode._getArg1(__program, scan);
	rep._max    = OpCode._getArg2(__program, scan);
	rep._scan   = OpCode._getNextOperator(scan) + 2;
	rep._next   = next;
	rep._minMod = minMod;
	// Must initialize to -1 because if we initialize to 0 and are
	// at the beginning of the input the OpCode._WHILEM case will
	// not work right.
	rep._lastLocation = -1;
	__inputOffset = input;

	// use minMod as temporary
	minMod = __match(OpCode._getPrevOperator(next));

	// leave scope call not pertinent?
	__currentRep = rep._lastRepetition;
	return minMod;

      case OpCode._WHILEM:
	rep = __currentRep;

	arg = rep._numInstances + 1;
	__inputOffset = input;

	if(input == rep._lastLocation) {
	  __currentRep = rep._lastRepetition;
	  line = __currentRep._numInstances;
	  if(__match(rep._next))
	    return true;
	  __currentRep._numInstances = line;
	  __currentRep = rep;
	  return false;
	}

	if(arg < rep._min) {
	  rep._numInstances = arg;
	  rep._lastLocation = input;
	  if(__match(rep._scan))
	    return true;
	  rep._numInstances = arg - 1;
	  return false;
	}

	if(rep._minMod) {
	  __currentRep = rep._lastRepetition;
	  line = __currentRep._numInstances;
	  if(__match(rep._next))
	    return true;
	  __currentRep._numInstances = line;
	  __currentRep = rep;

	  if(arg >= rep._max)
	    return false;

	  __inputOffset = input;
	  rep._numInstances = arg;
	  rep._lastLocation = input;

	  if(__match(rep._scan))
	    return true;

	  rep._numInstances = arg - 1;
	  return false;
	}

	if(arg < rep._max) {
	  __pushState(rep._parenFloor);
	  rep._numInstances = arg;
	  rep._lastLocation = input;
	  if(__match(rep._scan))
	    return true;
	  __popState();
	  __inputOffset = input;
	}

	__currentRep = rep._lastRepetition;
	line = __currentRep._numInstances;
	if(__match(rep._next))
	  return true;

	rep._numInstances = line;
	__currentRep = rep;
	rep._numInstances = arg - 1;
	return false;

      case OpCode._BRANCH:
	if(__program[next] != OpCode._BRANCH)
	  next = OpCode._getNextOperator(scan);
	else {
	  int lastParen;

	  lastParen = __lastParen;

	  do {

	    __inputOffset = input;

	    if(__match(OpCode._getNextOperator(scan)))
	      return true;

	    for(arg = __lastParen; arg > lastParen; --arg)
	      // __endMatchOffsets[arg] = 0;
	      __endMatchOffsets[arg] = OpCode._NULL_OFFSET;
	    __lastParen = arg;

	    scan = OpCode._getNext(__program, scan);
	  } while(scan != OpCode._NULL_OFFSET &&
		  __program[scan] == OpCode._BRANCH);
	  return false;
	}

	break;

      case OpCode._MINMOD:
	minMod = true;
	break;


      case OpCode._CURLY:
      case OpCode._STAR:
      case OpCode._PLUS:
	if(op == OpCode._CURLY) {
	  line = OpCode._getArg1(__program, scan);
	  arg  = OpCode._getArg2(__program, scan);
	  scan = OpCode._getNextOperator(scan) + 2;
	} else if(op == OpCode._STAR) {
	  line = 0;
	  arg  = Character.MAX_VALUE;
	  scan = OpCode._getNextOperator(scan);
	} else {
	  line = 1;
	  arg  = Character.MAX_VALUE;
	  scan = OpCode._getNextOperator(scan);
	}

	if(__program[next] == OpCode._EXACTLY) {
	  nextChar = __program[OpCode._getOperand(next) + 1];
	  current  = 0;
	} else {
	  nextChar = __EOS;
	  current  = -1000;
	}
	__inputOffset = input;

	if(minMod) {
	  minMod = false;

	  if(line > 0 && __repeat(scan, line) < line)
	    return false;


	  while(arg >= line || (arg == Character.MAX_VALUE && line > 0)) {
	    // there may be a bug here with respect to
	    // __inputOffset >= __endOffset, but it seems to be right for
	    // now.  the issue is with __inputOffset being reset later.
	    // is this test really supposed to happen here?
	    if(current == -1000 || __inputOffset >= __endOffset ||
	       __input[__inputOffset] == nextChar) {
	      if(__match(next))
		return true;
	    }

	    __inputOffset = input + line;

	    if(__repeat(scan, 1) != 0) {
	      ++line;
	      __inputOffset = input + line;
	    } else
	      return false;
	  }

	} else {
	  arg = __repeat(scan, arg);

	  if(line < arg && OpCode._opType[__program[next]] == OpCode._EOL &&
	     ((!__multiline && __program[next] != OpCode._MEOL) ||
              __program[next] == OpCode._SEOL))
	    line = arg;

	  while(arg >= line) {
	    // there may be a bug here with respect to
	    // __inputOffset >= __endOffset, but it seems to be right for
	    // now.  the issue is with __inputOffset being reset later.
	    // is this test really supposed to happen here?
	    if(current == -1000 || __inputOffset >= __endOffset ||
	       __input[__inputOffset] == nextChar) {
	      if(__match(next))
		return true;
	    }

	    --arg;
	    __inputOffset = input + arg;
	  }
	}

	return false;

      case OpCode._SUCCEED:
      case OpCode._END:
	__inputOffset = input;
	// This enforces the rule that two consecutive matches cannot have
	// the same end offset.
	if(__inputOffset == __lastMatchInputEndOffset)
	  return false;
	return true;

      case OpCode._IFMATCH:
	__inputOffset = input;
	scan = OpCode._getNextOperator(scan);
	if(!__match(scan))
	  return false;
	break;

      case OpCode._UNLESSM:
	__inputOffset = input;
	scan = OpCode._getNextOperator(scan);
	if(__match(scan))
	  return false;
	break;


      default:
	// todo: Need to throw an exception here.

      } // end switch

      // scan = (next > 0 ? next : 0);
      scan = next;
    } // end while scan



    return false;
  }


  /**
   * Set whether or not subsequent calls to {@link #matches matches()}
   * or {@link #contains contains()} should treat the input as
   * consisting of multiple lines.  The default behavior is for 
   * input to be treated as consisting of multiple lines.  This method
   * should only be called if the Perl5Pattern used for a match was
   * compiled without either of the Perl5Compiler.MULTILINE_MASK or
   * Perl5Compiler.SINGLELINE_MASK flags, and you want to alter the
   * behavior of how the <b>^</b>, <b>$</b>, and <b>.</b> metacharacters are
   * interpreted on the fly.  The compilation options used when compiling
   * a pattern ALWAYS override the behavior specified by setMultiline().  See
   * {@link Perl5Compiler} for more details.
   * <p>
   * @param multiline  If set to true treats the input as consisting of
   *        multiple lines with respect to the <b>^</b> and <b>$</b> 
   *        metacharacters.  If set to false treats the input as consisting
   *        of a single line with respect to the <b>^</b> and <b>$</b> 
   *        metacharacters.
   */
  public void setMultiline(boolean multiline) {__multiline = multiline; }


  /**
   * @return True if the matcher is treating input as consisting of multiple
   *         lines with respect to the <b>^</b> and <b>$</b> metacharacters,
   *         false otherwise.
   */
  public boolean isMultiline() {return __multiline; }

  char[] _toLower(char[] input) {
    int current;
    char[] inp;
    // todo:
    // Certainly not the best way to do case insensitive matching.
    // Must definitely change this in some way, but for now we
    // do what Perl does and make a copy of the input, converting
    // it all to lowercase.  This is truly better handled in the
    // compilation phase.
    inp = new char[input.length];
    System.arraycopy(input, 0, inp, 0, input.length);
    input = inp;

    // todo: Need to inline toLowerCase()
    for(current = 0; current < input.length; current++)
      if(Character.isUpperCase(input[current]))
	input[current] = Character.toLowerCase(input[current]);

    return input;
  }


  /**
   * Determines if a prefix of a string (represented as a char[])
   * matches a given pattern, starting from a given offset into the string.
   * If a prefix of the string matches the pattern, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.
   * <p>
   * This method is useful for certain common token identification tasks
   * that are made more difficult without this functionality.
   * <p>
   * @param input  The char[] to test for a prefix match.
   * @param pattern  The Pattern to be matched.
   * @param offset The offset at which to start searching for the prefix.
   * @return True if input matches pattern, false otherwise.
   */
  public boolean matchesPrefix(char[] input, Pattern pattern, int offset) {
    Perl5Pattern expression;

    expression = (Perl5Pattern)pattern;
    __originalInput = input;
    if(expression._isCaseInsensitive)
      input = _toLower(input);

    __initInterpreterGlobals(expression, input, 0, input.length, offset);

    __lastSuccess = __tryExpression(offset);
    __lastMatchResult = null;

    return __lastSuccess;
  }


  /**
   * Determines if a prefix of a string (represented as a char[])
   * matches a given pattern.
   * If a prefix of the string matches the pattern, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.
   * <p>
   * This method is useful for certain common token identification tasks
   * that are made more difficult without this functionality.
   * <p>
   * @param input  The char[] to test for a prefix match.
   * @param pattern  The Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   */
  public boolean matchesPrefix(char[] input, Pattern pattern) {
    return matchesPrefix(input, pattern, 0);
  }


  /**
   * Determines if a prefix of a string matches a given pattern.
   * If a prefix of the string matches the pattern, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.
   * <p>
   * This method is useful for certain common token identification tasks
   * that are made more difficult without this functionality.
   * <p>
   * @param input  The String to test for a prefix match.
   * @param pattern  The Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   */
  public boolean matchesPrefix(String input, Pattern pattern) {
    return matchesPrefix(input.toCharArray(), pattern, 0);
  }

  /**
   * Determines if a prefix of a PatternMatcherInput instance
   * matches a given pattern.  If there is a match, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.  Unlike the
   * {@link #contains(PatternMatcherInput, Pattern)}
   * method, the current offset of the PatternMatcherInput argument
   * is not updated.  However, unlike the
   * {@link #matches matches(PatternMatcherInput, Pattern)} method,
   * matchesPrefix() will start its search from the current offset
   * rather than the begin offset of the PatternMatcherInput.
   * <p>
   * This method is useful for certain common token identification tasks
   * that are made more difficult without this functionality.
   * <p>
   * @param input  The PatternMatcherInput to test for a prefix match.
   * @param pattern  The Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   */
  public boolean matchesPrefix(PatternMatcherInput input, Pattern pattern) {
    char[] inp;
    Perl5Pattern expression;

    expression = (Perl5Pattern)pattern;

    __originalInput = input._originalBuffer;
    if(expression._isCaseInsensitive) {
      if(input._toLowerBuffer == null)
	input._toLowerBuffer = _toLower(__originalInput);
      inp = input._toLowerBuffer;
    } else
      inp = __originalInput;

    __initInterpreterGlobals(expression, inp, input._beginOffset,
			     input._endOffset, input._currentOffset);
    __lastSuccess = __tryExpression(input._currentOffset);
    __lastMatchResult = null;

    return __lastSuccess;
  }



  /**
   * Determines if a string (represented as a char[]) exactly 
   * matches a given pattern.  If
   * there is an exact match, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.  The pattern must be
   * a Perl5Pattern instance, otherwise a ClassCastException will
   * be thrown.  You are not required to, and indeed should NOT try to
   * (for performance reasons), catch a ClassCastException because it
   * will never be thrown as long as you use a Perl5Pattern as the pattern
   * parameter.
   * <p>
   * <b>Note:</b> matches() is not the same as sticking a ^ in front of
   * your expression and a $ at the end of your expression in Perl5
   * and using the =~ operator, even though in many cases it will be
   * equivalent.  matches() literally looks for an exact match according
   * to the rules of Perl5 expression matching.  Therefore, if you have
   * a pattern <em>foo|foot</em> and are matching the input <em>foot</em>
   * it will not produce an exact match.  But <em>foot|foo</em> will
   * produce an exact match for either <em>foot</em> or <em>foo</em>.
   * Remember, Perl5 regular expressions do not match the longest
   * possible match.  From the perlre manpage:
   * <blockquote>
   *   Alternatives are tried from left to right, so the first
   *   alternative found for which the entire expression matches,
   *   is the one that is chosen. This means that alternatives
   *   are not necessarily greedy. For example: when matching
   *   foo|foot against "barefoot", only the "foo" part will
   *   match, as that is the first alternative tried, and it
   *   successfully matches the target string.
   * </blockquote>
   * <p>
   * @param input  The char[] to test for an exact match.
   * @param pattern  The Perl5Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean matches(char[] input, Pattern pattern) {
    Perl5Pattern expression;

    expression = (Perl5Pattern)pattern;
    __originalInput = input;
    if(expression._isCaseInsensitive)
      input = _toLower(input);

    __initInterpreterGlobals(expression, input, 0, input.length, 0);
    __lastSuccess = (__tryExpression(0) &&
		     __endMatchOffsets[0] == input.length);
    __lastMatchResult = null;

    return __lastSuccess;
  }


  /**
   * Determines if a string exactly matches a given pattern.  If
   * there is an exact match, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.  The pattern must be
   * a Perl5Pattern instance, otherwise a ClassCastException will
   * be thrown.  You are not required to, and indeed should NOT try to
   * (for performance reasons), catch a ClassCastException because it
   * will never be thrown as long as you use a Perl5Pattern as the pattern
   * parameter.
   * <p>
   * <b>Note:</b> matches() is not the same as sticking a ^ in front of
   * your expression and a $ at the end of your expression in Perl5
   * and using the =~ operator, even though in many cases it will be
   * equivalent.  matches() literally looks for an exact match according
   * to the rules of Perl5 expression matching.  Therefore, if you have
   * a pattern <em>foo|foot</em> and are matching the input <em>foot</em>
   * it will not produce an exact match.  But <em>foot|foo</em> will
   * produce an exact match for either <em>foot</em> or <em>foo</em>.
   * Remember, Perl5 regular expressions do not match the longest
   * possible match.  From the perlre manpage:
   * <blockquote>
   *   Alternatives are tried from left to right, so the first
   *   alternative found for which the entire expression matches,
   *   is the one that is chosen. This means that alternatives
   *   are not necessarily greedy. For example: when matching
   *   foo|foot against "barefoot", only the "foo" part will
   *   match, as that is the first alternative tried, and it
   *   successfully matches the target string.
   * </blockquote>
   * <p>
   * @param input  The String to test for an exact match.
   * @param pattern  The Perl5Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean matches(String input, Pattern pattern) {
    return matches(input.toCharArray(), pattern);
  }


  /**
   * Determines if the contents of a PatternMatcherInput instance
   * exactly matches a given pattern.  If
   * there is an exact match, a MatchResult instance
   * representing the match is made accesible via
   * {@link #getMatch()}.  Unlike the
   * {@link #contains(PatternMatcherInput, Pattern)}
   * method, the current offset of the PatternMatcherInput argument
   * is not updated.  You should remember that the region between
   * the begin (NOT the current) and end offsets of the PatternMatcherInput
   * will be tested for an exact match.
   * <p>
   * The pattern must be a Perl5Pattern instance, otherwise a
   * ClassCastException will be thrown.  You are not required to, and 
   * indeed should NOT try to (for performance reasons), catch a
   * ClassCastException because it will never be thrown as long as you use
   * a Perl5Pattern as the pattern parameter.
   * <p>
   * <b>Note:</b> matches() is not the same as sticking a ^ in front of
   * your expression and a $ at the end of your expression in Perl5
   * and using the =~ operator, even though in many cases it will be
   * equivalent.  matches() literally looks for an exact match according
   * to the rules of Perl5 expression matching.  Therefore, if you have
   * a pattern <em>foo|foot</em> and are matching the input <em>foot</em>
   * it will not produce an exact match.  But <em>foot|foo</em> will
   * produce an exact match for either <em>foot</em> or <em>foo</em>.
   * Remember, Perl5 regular expressions do not match the longest
   * possible match.  From the perlre manpage:
   * <blockquote>
   *   Alternatives are tried from left to right, so the first
   *   alternative found for which the entire expression matches,
   *   is the one that is chosen. This means that alternatives
   *   are not necessarily greedy. For example: when matching
   *   foo|foot against "barefoot", only the "foo" part will
   *   match, as that is the first alternative tried, and it
   *   successfully matches the target string.
   * </blockquote>
   * <p>
   * @param input  The PatternMatcherInput to test for a match.
   * @param pattern  The Perl5Pattern to be matched.
   * @return True if input matches pattern, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean matches(PatternMatcherInput input, Pattern pattern) {
    char[] inp;
    Perl5Pattern expression;

    expression = (Perl5Pattern)pattern;

    __originalInput = input._originalBuffer;
    if(expression._isCaseInsensitive) {
      if(input._toLowerBuffer == null)
	input._toLowerBuffer = _toLower(__originalInput);
      inp = input._toLowerBuffer;
    } else
      inp = __originalInput;

    __initInterpreterGlobals(expression, inp, input._beginOffset,
			     input._endOffset, input._beginOffset);

    __lastMatchResult = null;

    if(__tryExpression(input._beginOffset)) {
      if(__endMatchOffsets[0] == input._endOffset ||
	 input.length() == 0 || input._beginOffset == input._endOffset) {
	__lastSuccess = true;
	return true;
      }
    }

    __lastSuccess = false;

    return false;
  }



  /**
   * Determines if a string contains a pattern.  If the pattern is
   * matched by some substring of the input, a MatchResult instance
   * representing the <b> first </b> such match is made acessible via 
   * {@link #getMatch()}.  If you want to access
   * subsequent matches you should either use a PatternMatcherInput object
   * or use the offset information in the MatchResult to create a substring
   * representing the remaining input.  Using the MatchResult offset 
   * information is the recommended method of obtaining the parts of the
   * string preceeding the match and following the match.
   * <p>
   * The pattern must be a Perl5Pattern instance, otherwise a
   * ClassCastException will be thrown.  You are not required to, and 
   * indeed should NOT try to (for performance reasons), catch a
   * ClassCastException because it will never be thrown as long as you use
   * a Perl5Pattern as the pattern parameter.
   * <p>
   * @param input  The String to test for a match.
   * @param pattern  The Perl5Pattern to be matched.
   * @return True if the input contains a pattern match, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean contains(String input, Pattern pattern) {
    return contains(input.toCharArray(), pattern);
  }


  /**
   * Determines if a string (represented as a char[]) contains a pattern.
   * If the pattern is
   * matched by some substring of the input, a MatchResult instance
   * representing the <b> first </b> such match is made acessible via 
   * {@link #getMatch()}.  If you want to access
   * subsequent matches you should either use a PatternMatcherInput object
   * or use the offset information in the MatchResult to create a substring
   * representing the remaining input.  Using the MatchResult offset 
   * information is the recommended method of obtaining the parts of the
   * string preceeding the match and following the match.
   * <p>
   * The pattern must be a Perl5Pattern instance, otherwise a
   * ClassCastException will be thrown.  You are not required to, and 
   * indeed should NOT try to (for performance reasons), catch a
   * ClassCastException because it will never be thrown as long as you use
   * a Perl5Pattern as the pattern parameter.
   * <p>
   * @param input  The char[] to test for a match.
   * @param pattern  The Perl5Pattern to be matched.
   * @return True if the input contains a pattern match, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean contains(char[] input, Pattern pattern) {
    Perl5Pattern expression;

    expression = (Perl5Pattern)pattern;

    __originalInput = input;

    if(expression._isCaseInsensitive)
      input = _toLower(input);

    return __interpret(expression, input, 0, input.length, 0);
  }


  private static final int __DEFAULT_LAST_MATCH_END_OFFSET = -100;
  private int __lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET;
  /**
   * Determines if the contents of a PatternMatcherInput, starting from the
   * current offset of the input contains a pattern.
   * If a pattern match is found, a MatchResult
   * instance representing the <b>first</b> such match is made acessible via 
   * {@link #getMatch()}.  The current offset of the
   * PatternMatcherInput is set to the offset corresponding to the end
   * of the match, so that a subsequent call to this method will continue
   * searching where the last call left off.  You should remember that the
   * region between the begin and end offsets of the PatternMatcherInput are
   * considered the input to be searched, and that the current offset
   * of the PatternMatcherInput reflects where a search will start from.
   * Matches extending beyond the end offset of the PatternMatcherInput
   * will not be matched.  In other words, a match must occur entirely
   * between the begin and end offsets of the input.  See
   * {@link PatternMatcherInput} for more details.
   * <p>
   * As a side effect, if a match is found, the PatternMatcherInput match
   * offset information is updated.  See the
   * {@link PatternMatcherInput#setMatchOffsets(int, int)}
   * method for more details.
   * <p>
   * The pattern must be a Perl5Pattern instance, otherwise a
   * ClassCastException will be thrown.  You are not required to, and 
   * indeed should NOT try to (for performance reasons), catch a
   * ClassCastException because it will never be thrown as long as you use
   * a Perl5Pattern as the pattern parameter.
   * <p>
   * This method is usually used in a loop as follows:
   * <blockquote><pre>
   * PatternMatcher matcher;
   * PatternCompiler compiler;
   * Pattern pattern;
   * PatternMatcherInput input;
   * MatchResult result;
   *
   * compiler = new Perl5Compiler();
   * matcher  = new Perl5Matcher();
   *
   * try {
   *   pattern = compiler.compile(somePatternString);
   * } catch(MalformedPatternException e) {
   *   System.err.println("Bad pattern.");
   *   System.err.println(e.toString());
   *   return;
   * }
   *
   * input   = new PatternMatcherInput(someStringInput);
   *
   * while(matcher.contains(input, pattern)) {
   *   result = matcher.getMatch();  
   *   // Perform whatever processing on the result you want.
   * }
   *
   * </pre></blockquote>
   * <p>
   * @param input  The PatternMatcherInput to test for a match.
   * @param pattern  The Pattern to be matched.
   * @return True if the input contains a pattern match, false otherwise.
   * @exception ClassCastException If a Pattern instance other than a
   *         Perl5Pattern is passed as the pattern parameter.
   */
  public boolean contains(PatternMatcherInput input, Pattern pattern) {
    char[] inp;
    Perl5Pattern expression;
    boolean matchFound;


    // if(input.length() > 0) {
    // We want to allow a null string to match at the end of the input
    // which is why we don't check endOfInput.  Not sure if this is a
    // safe thing to do or not.
    if(input._currentOffset > input._endOffset)
      return false;
    // }
    /* else 
      if(input._endOfInput())
	return false;
	*/
    expression = (Perl5Pattern)pattern;
    __originalInput = input._originalBuffer;

    // Todo:
    // Really should only reduce to lowercase that part of the
    // input that is necessary, instead of the whole thing.
    // Adjust MatchResult offsets accordingly.  Actually, pass an adjustment
    // value to __interpret.
    __originalInput = input._originalBuffer;
    if(expression._isCaseInsensitive) {
      if(input._toLowerBuffer == null)
	input._toLowerBuffer = _toLower(__originalInput);
      inp = input._toLowerBuffer;
    } else
      inp = __originalInput;

    __lastMatchInputEndOffset = input.getMatchEndOffset();

    matchFound =
      __interpret(expression, inp, input._beginOffset, input._endOffset,
		  input._currentOffset);

    if(matchFound) {
      input.setCurrentOffset(__endMatchOffsets[0]);
      input.setMatchOffsets(__beginMatchOffsets[0], __endMatchOffsets[0]);
    } else {
      input.setCurrentOffset(input._endOffset + 1);
    }

    // Restore so it doesn't interfere with other unrelated matches.
    __lastMatchInputEndOffset = __DEFAULT_LAST_MATCH_END_OFFSET;

    return matchFound;
  }


  /**
   * Fetches the last match found by a call to a matches() or contains()
   * method.  If you plan on modifying the original search input, you
   * must call this method BEFORE you modify the original search input,
   * as a lazy evaluation technique is used to create the MatchResult.
   * This reduces the cost of pattern matching when you don't care about
   * the actual match and only care if the pattern occurs in the input.
   * Otherwise, a MatchResult would be created for every match found,
   * whether or not the MatchResult was later used by a call to getMatch().
   * <p>
   * @return A MatchResult instance containing the pattern match found
   *         by the last call to any one of the matches() or contains()
   *         methods.  If no match was found by the last call, returns
   *         null. 
   */
  public MatchResult getMatch() {
    if(!__lastSuccess)
      return null;

    if(__lastMatchResult == null)
      __setLastMatchResult();

    return __lastMatchResult;
  }

}
